library(knitr)
library(shiny)
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.3
library(ggplot2)
library(dplyr)
library(caret)
library(pROC)
## Warning: package 'pROC' was built under R version 3.4.3
Liczba wierszy i kolumn wczytanego zbioru danych:
elektrownie <- as.data.frame(read.csv("elektrownie.csv"));
## [1] 235790 51
ZastÄ pienie zerowych wartoĆci ĆredniÄ danego atrybutu. TÄ metodÄ zastosowaĆam m. in. w praypadku atrybutu: ciĆnienie, odlegĆoĆÄ, azymut, wysokoĆÄ.
Zmiana nazw kolumn oraz wyĆwietlenie liczby wierszy i nazwy atrybutĂłw.
kor1 <- plot_ly(elektrownie_wykr, x = ~data, y= elektrownie_wykr$azimuth, name = 'azimuth', type = 'scatter', mode = 'markers') %>%
add_trace(y = ~elektrownie_wykr$kwh, name = 'energia', mode = 'markers') %>%
layout(title = 'Azymut - energia ',
xaxis = list(title = 'Energia',
zeroline = TRUE),
yaxis = list(title = 'Azymut'));
kor1;
kor2 <- plot_ly(elektrownie_wykr, x = ~data, y= elektrownie_wykr$tempi, name = 'temp', type = 'scatter', mode = 'markers') %>%
add_trace(y = ~elektrownie_wykr$kwh, name = 'energia', mode = 'markers')%>%
layout(title = 'Temperatura - energia ',
xaxis = list(title = 'Energia',
zeroline = TRUE),
yaxis = list(title = 'Temperatura'));
kor2;
kor3 <- plot_ly(elektrownie_wykr, x = ~data, y= elektrownie_wykr$pressure, name = 'pressure', type = 'scatter', mode = 'markers') %>%
add_trace(y = ~elektrownie_wykr$kwh, name = 'energia', mode = 'markers')%>%
layout(title = 'Cisnienie - energia',
xaxis = list(title = 'Energia',
zeroline = TRUE),
yaxis = list(title = 'Ciesnienie'));
kor3;
p_glowny <- plot_ly(elektrownie_wykr, x = ~kwh, y = ~ora, z = ~dist,
marker = list(color = ~elektrownie_wykr$azimuth, colorscale = c('#FFE1A1', '#683531'), showscale = TRUE)) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'Energia'),
yaxis = list(title = 'Godzina'),
zaxis = list(title = 'Odleglosc')),
annotations = list(
x = 1.13,
y = 1.05,
text = 'Energia',
xref = 'paper',
yref = 'paper',
showarrow = FALSE
))
p_glowny;
W celu przeprowadzenia procesu uczenia podzieliĆam zbiĂłr danych na zbiĂłr treningowy i testowy w proporcjach 75/25. Pierwszym eksperymentem jest uczenie na zbiorze treningowym z wykorzystaniem algorytmu procesu Gaussa oraz osobno z wykorzystaniem sieci neuronowej. Miara RMSE jest mniejsza w przypadku z wykorzystaniem sieci neuronowej.
Kolejne procesy to uczenie na podstawie wiÄkszej iloĆci atrybutĂłw z wykorzystaniem algorytmu procesu Gaussa. W pierwszym z tych przypadkĂłw uczenie odbywa siÄ na podstawie temperatury, a w kolejnym - temperatury, poĆoĆŒenia i zachmurzenia. Im wiÄcej atrybutĂłw jest branych pod uwagÄ, tym proces uczenia osiÄ ga lepsze wyniki, jednak znacznie spada tempo uczenia (lub proces siÄ nie koĆczy).
train_test <- createTimeSlices(y = elektrownie_wykr$idsito, initialWindow = .75)
myTimeControl <- trainControl( method = "timeslice", initialWindow = .75, fixedWindow = FALSE, allowParallel = TRUE, classProbs = TRUE, verboseIter = TRUE)
trainPart <- createDataPartition(
y = elektrownie_wykr$idsito,
p = .75,
list = FALSE
)
training <- head(elektrownie_wykr,1896);
testing <- tail(elektrownie_wykr, 631);
ctrl <- trainControl(
method = "timeslice",
initialWindow = 20,
# liczba podziaĆĂłw
number = 2);
set.seed(23)
fit <- train(kwh ~ idsito,
data = training,
method = "gaussprLinear",
trControl = ctrl);
fit;
## Gaussian Process
##
## 1896 samples
## 1 predictor
##
## No pre-processing
## Resampling: Rolling Forecasting Origin Resampling (1 held-out with a fixed window)
## Summary of sample sizes: 20, 20, 20, 20, 20, 20, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.1463901 NaN 0.1463901
set.seed(23)
fitnn <- train(kwh ~ idsito,
data = training,
method = "neuralnet",
trControl = ctrl);
fitnn;
## Neural Network
##
## 1896 samples
## 1 predictor
##
## No pre-processing
## Resampling: Rolling Forecasting Origin Resampling (1 held-out with a fixed window)
## Summary of sample sizes: 20, 20, 20, 20, 20, 20, ...
## Resampling results across tuning parameters:
##
## layer1 RMSE Rsquared MAE
## 1 0.01973458666 NaN 0.01973458666
## 3 0.01955238463 NaN 0.01955238463
## 5 0.01829729120 NaN 0.01829729120
##
## Tuning parameter 'layer2' was held constant at a value of 0
##
## Tuning parameter 'layer3' was held constant at a value of 0
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were layer1 = 5, layer2 = 0 and
## layer3 = 0.
plotnn <- ggplot(fitnn) + theme_bw() ;
renderPlot(plotnn);
renderText("Trenowanie z parametrami - idsito i temperatura")
set.seed(23)
fitTemp <- train(kwh ~ idsito + tempi,
data = training,
method = "gaussprLinear",
trControl = ctrl);
fitTemp;
## Gaussian Process
##
## 1896 samples
## 2 predictor
##
## No pre-processing
## Resampling: Rolling Forecasting Origin Resampling (1 held-out with a fixed window)
## Summary of sample sizes: 20, 20, 20, 20, 20, 20, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.1378573947 NaN 0.1378573947
renderText("Trenowanie z parametrami - idsito, azymut i zachmurzenie")
set.seed(23)
fitTempHum <- train(kwh ~ idsito + azimuth + cloudcover,
data = training,
method = "gaussprLinear",
trControl = ctrl);
fitTempHum;
## Gaussian Process
##
## 1896 samples
## 3 predictor
##
## No pre-processing
## Resampling: Rolling Forecasting Origin Resampling (1 held-out with a fixed window)
## Summary of sample sizes: 20, 20, 20, 20, 20, 20, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.08322094888 NaN 0.08322094888
renderText("Trenowanie z parametrami - idsito, irri_pvgis i irri")
set.seed(23)
fitIrri <- train(kwh ~ idsito + irri_pvgis_mod + irri,
data = training,
method = "gaussprLinear",
trControl = ctrl);
fitIrri;
## Gaussian Process
##
## 1896 samples
## 3 predictor
##
## No pre-processing
## Resampling: Rolling Forecasting Origin Resampling (1 held-out with a fixed window)
## Summary of sample sizes: 20, 20, 20, 20, 20, 20, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.0668492654 NaN 0.0668492654
gClasses <- predict(fitTemp, newdata = testing);
postResample(gClasses, testing$kwh);
## RMSE Rsquared MAE
## 0.01653008910 0.00312355652 0.01325692561
gClassesTest <- predict(fitTempHum, newdata = testing);
postResample(gClassesTest, testing$kwh)
## RMSE Rsquared MAE
## 0.0277250858998 0.0007023057807 0.0181855669656
gClassesTest <- predict(fitIrri, newdata = testing);
postResample(gClassesTest, testing$kwh)
## RMSE Rsquared MAE
## 0.0232193958840 0.0005295241498 0.0137302143081